{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 05 How parameters change as data is shifted and scaled" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%html\n", "" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pandas import Series, DataFrame\n", "import matplotlib.pyplot as plt\n", "from scipy import stats" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[khanacademy](https://www.khanacademy.org/math/ap-statistics/summarizing-quantitative-data-ap/linear-transformations-data/v/how-parameters-change-as-data-is-shifted-and-scaled)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![How parameters change as data is shifted and scaled fig 1](./imgs/04-05-01.png)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "x = np.array([7, 7, 5, 8, 10, 13, 5, 3, 2, 3, 5, 6])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = DataFrame({'Data': x, \n", " 'Data+5': x+5,\n", " 'Data*5': x*5})" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DataData+5Data*5
071235
171235
251025
381340
4101550
5131865
651025
73815
82710
93815
1051025
1161130
\n", "
" ], "text/plain": [ " Data Data+5 Data*5\n", "0 7 12 35\n", "1 7 12 35\n", "2 5 10 25\n", "3 8 13 40\n", "4 10 15 50\n", "5 13 18 65\n", "6 5 10 25\n", "7 3 8 15\n", "8 2 7 10\n", "9 3 8 15\n", "10 5 10 25\n", "11 6 11 30" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "mean_std_df = df.describe()[1:3]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "median_iqr_df = DataFrame({'Data': [np.median(df['Data']), stats.iqr(df['Data'])],\n", " 'Data+5': [np.median(df['Data+5']), stats.iqr(df['Data+5'])],\n", " 'Data*5': [np.median(df['Data*5']), stats.iqr(df['Data*5'])]\n", "}, index=['median', 'iqr'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df = pd.concat([df, mean_std_df, median_iqr_df])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DataData+5Data*5
07.00000012.00000035.000000
17.00000012.00000035.000000
25.00000010.00000025.000000
38.00000013.00000040.000000
410.00000015.00000050.000000
513.00000018.00000065.000000
65.00000010.00000025.000000
73.0000008.00000015.000000
82.0000007.00000010.000000
93.0000008.00000015.000000
105.00000010.00000025.000000
116.00000011.00000030.000000
mean6.16666711.16666730.833333
std3.1285593.12855915.642793
median5.50000010.50000027.500000
iqr2.7500002.75000013.750000
\n", "
" ], "text/plain": [ " Data Data+5 Data*5\n", "0 7.000000 12.000000 35.000000\n", "1 7.000000 12.000000 35.000000\n", "2 5.000000 10.000000 25.000000\n", "3 8.000000 13.000000 40.000000\n", "4 10.000000 15.000000 50.000000\n", "5 13.000000 18.000000 65.000000\n", "6 5.000000 10.000000 25.000000\n", "7 3.000000 8.000000 15.000000\n", "8 2.000000 7.000000 10.000000\n", "9 3.000000 8.000000 15.000000\n", "10 5.000000 10.000000 25.000000\n", "11 6.000000 11.000000 30.000000\n", "mean 6.166667 11.166667 30.833333\n", "std 3.128559 3.128559 15.642793\n", "median 5.500000 10.500000 27.500000\n", "iqr 2.750000 2.750000 13.750000" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DataData+5Data*5
mean6.16666711.16666730.833333
std3.1285593.12855915.642793
median5.50000010.50000027.500000
iqr2.7500002.75000013.750000
\n", "
" ], "text/plain": [ " Data Data+5 Data*5\n", "mean 6.166667 11.166667 30.833333\n", "std 3.128559 3.128559 15.642793\n", "median 5.500000 10.500000 27.500000\n", "iqr 2.750000 2.750000 13.750000" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[-4:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 4 }